Q10 is aspirations

Q7_7_1 is javascript skills

Q7_7_2 is mashup skills

Q8_8_1 is general programming skills

# reading input 
df<-read.csv("input/commits_novelty.csv", header =TRUE, sep=",")
df <- df[complete.cases(df), ]  
df
df$group = factor(df$group)
# create new columns called log relational novelty
df$log_relational_novelty <- log(df$similarity+1) 
df$log_count <- log(df$count+1) 
df
# standardizing variables for skills and aspirations. 
cols <- c("Q7_Q7_1", "Q7_Q7_2", "Q8_Q8_1", "Q10", "log_relational_novelty", "log_count")
df[cols] <- scale(df[cols])
df
mod <- lm(log_count ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = log_count ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.1938 -0.9742 -0.1165  0.5462  3.4873 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)  
(Intercept)    -0.07037    0.08193  -0.859   0.3907  
factor(group)1  0.21956    0.11368   1.931   0.0539 .
factor(group)2 -0.06328    0.11773  -0.537   0.5911  
factor(group)3  0.10161    0.11301   0.899   0.3689  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9968 on 607 degrees of freedom
Multiple R-squared:  0.01137,   Adjusted R-squared:  0.006485 
F-statistic: 2.327 on 3 and 607 DF,  p-value: 0.07358
mod <- lm( log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2, data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ Q10 + Q8_Q8_1 + Q7_Q7_1 + 
    Q7_Q7_2, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.1587 -0.4105  0.3565  0.7363  1.3073 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -6.293e-16  4.006e-02   0.000  1.00000   
Q10          1.887e-02  4.252e-02   0.444  0.65747   
Q8_Q8_1      8.371e-02  4.447e-02   1.882  0.06026 . 
Q7_Q7_1     -7.379e-02  4.973e-02  -1.484  0.13836   
Q7_Q7_2      1.383e-01  5.077e-02   2.725  0.00662 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9901 on 606 degrees of freedom
Multiple R-squared:  0.02607,   Adjusted R-squared:  0.01964 
F-statistic: 4.055 on 4 and 606 DF,  p-value: 0.002976
mod <- lm( log_relational_novelty ~ log_count , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ log_count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2833 -0.4112  0.2627  0.6549  1.4603 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.026e-16  3.743e-02    0.00        1    
log_count    3.816e-01  3.746e-02   10.19   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9251 on 609 degrees of freedom
Multiple R-squared:  0.1456,    Adjusted R-squared:  0.1442 
F-statistic: 103.8 on 1 and 609 DF,  p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count + 
    Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2745 -0.4240  0.2415  0.6170  1.4761 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    -0.17463    0.07542  -2.315  0.02093 *  
factor(group)1  0.13181    0.10606   1.243  0.21441    
factor(group)2  0.24455    0.10863   2.251  0.02474 *  
factor(group)3  0.31475    0.10396   3.028  0.00257 ** 
log_count       0.37561    0.03755  10.003  < 2e-16 ***
Q7_Q7_1        -0.05097    0.04623  -1.102  0.27070    
Q7_Q7_2         0.12593    0.04732   2.661  0.00799 ** 
Q8_Q8_1         0.05415    0.04118   1.315  0.18902    
Q10            -0.02165    0.04012  -0.540  0.58955    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9124 on 602 degrees of freedom
Multiple R-squared:  0.1784,    Adjusted R-squared:  0.1675 
F-statistic: 16.34 on 8 and 602 DF,  p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group)/stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ factor(group)/stage + log_count + 
    Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3576 -0.4230  0.2610  0.6157  1.6005 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)          -0.40408    0.18391  -2.197  0.02840 *  
factor(group)1        0.26077    0.25526   1.022  0.30739    
factor(group)2        0.09840    0.28683   0.343  0.73169    
factor(group)3        0.43213    0.25337   1.706  0.08861 .  
log_count             0.38321    0.03765  10.177  < 2e-16 ***
Q7_Q7_1              -0.05030    0.04616  -1.090  0.27624    
Q7_Q7_2               0.12720    0.04724   2.693  0.00729 ** 
Q8_Q8_1               0.05223    0.04112   1.270  0.20454    
Q10                  -0.02287    0.04005  -0.571  0.56822    
factor(group)0:stage  0.09193    0.06719   1.368  0.17174    
factor(group)1:stage  0.03997    0.06441   0.621  0.53515    
factor(group)2:stage  0.13794    0.07560   1.825  0.06856 .  
factor(group)3:stage  0.04473    0.06362   0.703  0.48225    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9109 on 598 degrees of freedom
Multiple R-squared:  0.1866,    Adjusted R-squared:  0.1703 
F-statistic: 11.43 on 12 and 598 DF,  p-value: < 2.2e-16
mod <- lm( log_relational_novelty ~ factor(group) * stage + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ factor(group) * stage + 
    log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3576 -0.4230  0.2610  0.6157  1.6005 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)          -0.40408    0.18391  -2.197  0.02840 *  
factor(group)1        0.26077    0.25526   1.022  0.30739    
factor(group)2        0.09840    0.28683   0.343  0.73169    
factor(group)3        0.43213    0.25337   1.706  0.08861 .  
stage                 0.09193    0.06719   1.368  0.17174    
log_count             0.38321    0.03765  10.177  < 2e-16 ***
Q7_Q7_1              -0.05030    0.04616  -1.090  0.27624    
Q7_Q7_2               0.12720    0.04724   2.693  0.00729 ** 
Q8_Q8_1               0.05223    0.04112   1.270  0.20454    
Q10                  -0.02287    0.04005  -0.571  0.56822    
factor(group)1:stage -0.05196    0.09304  -0.558  0.57671    
factor(group)2:stage  0.04601    0.10094   0.456  0.64869    
factor(group)3:stage -0.04720    0.09254  -0.510  0.61023    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9109 on 598 degrees of freedom
Multiple R-squared:  0.1866,    Adjusted R-squared:  0.1703 
F-statistic: 11.43 on 12 and 598 DF,  p-value: < 2.2e-16
# Proposed model by stepwise regression
library(stats)
mod <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count + 
    Q7_Q7_2, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.1984 -0.4152  0.2400  0.6402  1.5394 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    -0.17752    0.07523  -2.360  0.01861 *  
factor(group)1  0.12829    0.10455   1.227  0.22027    
factor(group)2  0.25395    0.10827   2.345  0.01932 *  
factor(group)3  0.32098    0.10365   3.097  0.00205 ** 
log_count       0.37833    0.03716  10.180  < 2e-16 ***
Q7_Q7_2         0.10987    0.03713   2.959  0.00320 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9122 on 605 degrees of freedom
Multiple R-squared:  0.1747,    Adjusted R-squared:  0.1679 
F-statistic: 25.61 on 5 and 605 DF,  p-value: < 2.2e-16
AIC(mod)
[1] 1629.635
BIC(mod)
[1] 1660.541
# without the factor ( group ) and with all confounding variables 
library(stats)
mod <- lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3239 -0.3892  0.2683  0.6427  1.5709 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.517e-16  3.714e-02   0.000  1.00000    
log_count    3.765e-01  3.764e-02  10.003  < 2e-16 ***
Q7_Q7_1     -5.352e-02  4.615e-02  -1.160  0.24666    
Q7_Q7_2      1.358e-01  4.707e-02   2.885  0.00406 ** 
Q8_Q8_1      6.056e-02  4.129e-02   1.467  0.14301    
Q10         -3.134e-02  3.974e-02  -0.788  0.43073    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9179 on 605 degrees of freedom
Multiple R-squared:  0.1643,    Adjusted R-squared:  0.1574 
F-statistic: 23.78 on 5 and 605 DF,  p-value: < 2.2e-16
AIC(mod)
[1] 1637.293
BIC(mod)
[1] 1668.199
library(stats)
mod.1 <- lm( log_relational_novelty ~ log_count + Q7_Q7_2 , data = df)
summary(mod.1)

Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_2, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2448 -0.4150  0.2665  0.6621  1.5082 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.442e-16  3.715e-02   0.000  1.00000    
log_count    3.781e-01  3.719e-02  10.164  < 2e-16 ***
Q7_Q7_2      1.184e-01  3.719e-02   3.184  0.00153 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9182 on 608 degrees of freedom
Multiple R-squared:  0.1596,    Adjusted R-squared:  0.1568 
F-statistic: 57.74 on 2 and 608 DF,  p-value: < 2.2e-16
AIC(mod.1)
[1] 1634.693
BIC(mod.1)
[1] 1652.353
library(stats)
mod.2 <- lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 , data = df)
summary(mod.2)

Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count + 
    Q7_Q7_2, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.1984 -0.4152  0.2400  0.6402  1.5394 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    -0.17752    0.07523  -2.360  0.01861 *  
factor(group)1  0.12829    0.10455   1.227  0.22027    
factor(group)2  0.25395    0.10827   2.345  0.01932 *  
factor(group)3  0.32098    0.10365   3.097  0.00205 ** 
log_count       0.37833    0.03716  10.180  < 2e-16 ***
Q7_Q7_2         0.10987    0.03713   2.959  0.00320 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9122 on 605 degrees of freedom
Multiple R-squared:  0.1747,    Adjusted R-squared:  0.1679 
F-statistic: 25.61 on 5 and 605 DF,  p-value: < 2.2e-16
AIC(mod.2)
[1] 1629.635
BIC(mod.2)
[1] 1660.541
# model with and without groups are very different ( significant )
anova(mod.1, mod.2)
Analysis of Variance Table

Model 1: log_relational_novelty ~ log_count + Q7_Q7_2
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2
  Res.Df    RSS Df Sum of Sq     F  Pr(>F)  
1    608 512.64                             
2    605 503.44  3    9.1943 3.683 0.01195 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
library(ALSM)
Loading required package: leaps
Loading required package: SuppDists
Loading required package: car
Loading required package: carData
step(lm(log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data=df),
method="both", trace = 1 )
Start:  AIC=-103.05
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + 
    Q7_Q7_2 + Q8_Q8_1 + Q10

                Df Sum of Sq    RSS      AIC
- Q10            1     0.243 501.43 -104.753
- Q7_Q7_1        1     1.012 502.20 -103.816
- Q8_Q8_1        1     1.440 502.63 -103.296
<none>                       501.19 -103.049
- factor(group)  3     8.603 509.79  -98.650
- Q7_Q7_2        1     5.897 507.08  -97.902
- log_count      1    83.308 584.50  -11.096

Step:  AIC=-104.75
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + 
    Q7_Q7_2 + Q8_Q8_1

                Df Sum of Sq    RSS      AIC
- Q7_Q7_1        1     1.022 502.45 -105.509
- Q8_Q8_1        1     1.258 502.69 -105.222
<none>                       501.43 -104.753
- factor(group)  3     8.884 510.31 -100.022
- Q7_Q7_2        1     5.680 507.11  -99.870
- log_count      1    83.348 584.78  -12.801

Step:  AIC=-105.51
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2 + 
    Q8_Q8_1

                Df Sum of Sq    RSS      AIC
- Q8_Q8_1        1     0.989 503.44 -106.308
<none>                       502.45 -105.509
- Q7_Q7_2        1     4.754 507.21 -101.756
- factor(group)  3     9.044 511.50 -100.609
- log_count      1    84.566 587.02  -12.465

Step:  AIC=-106.31
log_relational_novelty ~ factor(group) + log_count + Q7_Q7_2

                Df Sum of Sq    RSS      AIC
<none>                       503.44 -106.308
- factor(group)  3     9.194 512.64 -101.250
- Q7_Q7_2        1     7.287 510.73  -99.527
- log_count      1    86.242 589.68  -11.697

Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count + 
    Q7_Q7_2, data = df)

Coefficients:
   (Intercept)  factor(group)1  factor(group)2  factor(group)3       log_count         Q7_Q7_2  
       -0.1775          0.1283          0.2540          0.3210          0.3783          0.1099  
mod <- lm( log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 + Q7_Q7_1 + Q7_Q7_2 , data = df)
summary(mod)

Call:
lm(formula = log_relational_novelty ~ factor(group) + Q10 + Q8_Q8_1 + 
    Q7_Q7_1 + Q7_Q7_2, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.1605 -0.4467  0.3399  0.7208  1.4666 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)   
(Intercept)    -0.19601    0.08135  -2.409  0.01627 * 
factor(group)1  0.19557    0.11423   1.712  0.08741 . 
factor(group)2  0.22325    0.11719   1.905  0.05726 . 
factor(group)3  0.35026    0.11211   3.124  0.00187 **
Q10             0.02321    0.04301   0.540  0.58961   
Q8_Q8_1         0.07471    0.04438   1.683  0.09280 . 
Q7_Q7_1        -0.07583    0.04981  -1.522  0.12849   
Q7_Q7_2         0.13346    0.05105   2.614  0.00917 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9845 on 603 degrees of freedom
Multiple R-squared:  0.04181,   Adjusted R-squared:  0.03069 
F-statistic: 3.759 on 7 and 603 DF,  p-value: 0.0005336

Nest Phase in Group [ Linear Mixed Model ]

# convert to nominal factor
df$user2 = factor(df$user2)
df$stage = factor(df$stage)
df$group = factor(df$group)
# explore the data and their levels 
library(plyr)
ddply(df, ~ group * stage, function(data) summary(data$log_relational_novelty) )
ddply(df, ~ group * stage, summarise, log_relational_novelty.mean=mean(log_relational_novelty), log_relational_novelty.sd = sd(log_relational_novelty))
# histograms for two factors
hist(df[df$group == 0 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 0 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 1 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 2 & df$stage == 4,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 1,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 2,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 3,]$log_relational_novelty)

hist(df[df$group == 3 & df$stage == 4,]$log_relational_novelty)

boxplot(log_relational_novelty ~ group * stage, data = df, xlab="Group.Stage", ylab="log_relational_novelty")

with(df, interaction.plot(group, stage, log_relational_novelty, ylim=c(0, max(log_relational_novelty)))) # interaction plot

# library for LMM we will use on relational novelty 

library(lme4)
library(lmerTest)
library(car)

set sum-to-zero contrasts for the Anova cells

contrasts(df$group) <= "contr.sum"
     1    2    3
0 TRUE TRUE TRUE
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
contrasts(df$stage) <= "contr.sum"
     2    3    4
1 TRUE TRUE TRUE
2 TRUE TRUE TRUE
3 TRUE TRUE TRUE
4 TRUE TRUE TRUE
# stage nested within group 
full.model = lmer( log_relational_novelty ~ group/stage + (1 | user2 ), data = df, REML = FALSE)
Anova(full.model, type=3, test.statistics="F")
Analysis of Deviance Table (Type III Wald chisquare tests)

Response: log_relational_novelty
              Chisq Df Pr(>Chisq)
(Intercept)  2.5598  1     0.1096
group        2.0162  3     0.5691
group:stage 11.8367 12     0.4589
full.model
Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
Formula: log_relational_novelty ~ group/stage + (1 | user2)
   Data: df
      AIC       BIC    logLik  deviance  df.resid 
1222.7027 1302.1745 -593.3514 1186.7027       593 
Random effects:
 Groups   Name        Std.Dev.
 user2    (Intercept) 0.8765  
 Residual             0.4483  
Number of obs: 611, groups:  user2, 157
Fixed Effects:
  (Intercept)         group1         group2         group3  group0:stage2  group1:stage2  
     -0.25894        0.15903        0.18673        0.31581       -0.01712        0.11397  
group2:stage2  group3:stage2  group0:stage3  group1:stage3  group2:stage3  group3:stage3  
      0.03887        0.09815        0.07843        0.13950        0.17836        0.16036  
group0:stage4  group1:stage4  group2:stage4  group3:stage4  
      0.09115        0.10249        0.21386        0.13686  
library(performance)

check_collinearity(full.model)
# Check for Multicollinearity

Low Correlation

        Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
       group 2.01 [1.80, 2.27]         1.42      0.50     [0.44, 0.55]
 group:stage 2.01 [1.80, 2.27]         1.42      0.50     [0.44, 0.55]

variability is very much higher in individual user and than in stages/phases for relational novelty. The remaining variability of 0.203502 comes from factor other than individual users and stage. factor(group)1 has higher relational novelty than group 0 by about 0.2099 . factor(group)3 has higher relational novelty than group 0 by about 0.3765.

var.model = lmer( log_relational_novelty ~ factor(group) + ( 1 | user2) + ( 1 | stage), data = df)
summary(var.model)
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: log_relational_novelty ~ factor(group) + (1 | user2) + (1 | stage)
   Data: df

REML criterion at convergence: 1203.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.2978 -0.2484  0.0522  0.2410  3.5006 

Random effects:
 Groups   Name        Variance Std.Dev.
 user2    (Intercept) 0.789432 0.88850 
 stage    (Intercept) 0.002792 0.05284 
 Residual             0.203477 0.45108 
Number of obs: 611, groups:  user2, 157; stage, 4

Fixed effects:
               Estimate Std. Error       df t value Pr(>|t|)  
(Intercept)     -0.2208     0.1530 146.2072  -1.443    0.151  
factor(group)1   0.2099     0.2103 152.3350   0.998    0.320  
factor(group)2   0.2659     0.2097 154.1335   1.268    0.207  
factor(group)3   0.3765     0.2079 152.4574   1.812    0.072 .
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) fct()1 fct()2
factr(grp)1 -0.706              
factr(grp)2 -0.708  0.515       
factr(grp)3 -0.714  0.519  0.521
reduced.model = lm( log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(reduced.model)

Call:
lm(formula = log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.3239 -0.3892  0.2683  0.6427  1.5709 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.517e-16  3.714e-02   0.000  1.00000    
log_count    3.765e-01  3.764e-02  10.003  < 2e-16 ***
Q7_Q7_1     -5.352e-02  4.615e-02  -1.160  0.24666    
Q7_Q7_2      1.358e-01  4.707e-02   2.885  0.00406 ** 
Q8_Q8_1      6.056e-02  4.129e-02   1.467  0.14301    
Q10         -3.134e-02  3.974e-02  -0.788  0.43073    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9179 on 605 degrees of freedom
Multiple R-squared:  0.1643,    Adjusted R-squared:  0.1574 
F-statistic: 23.78 on 5 and 605 DF,  p-value: < 2.2e-16
full.model = lm( log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)
summary(full.model)

Call:
lm(formula = log_relational_novelty ~ factor(group) + log_count + 
    Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2745 -0.4240  0.2415  0.6170  1.4761 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    -0.17463    0.07542  -2.315  0.02093 *  
factor(group)1  0.13181    0.10606   1.243  0.21441    
factor(group)2  0.24455    0.10863   2.251  0.02474 *  
factor(group)3  0.31475    0.10396   3.028  0.00257 ** 
log_count       0.37561    0.03755  10.003  < 2e-16 ***
Q7_Q7_1        -0.05097    0.04623  -1.102  0.27070    
Q7_Q7_2         0.12593    0.04732   2.661  0.00799 ** 
Q8_Q8_1         0.05415    0.04118   1.315  0.18902    
Q10            -0.02165    0.04012  -0.540  0.58955    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9124 on 602 degrees of freedom
Multiple R-squared:  0.1784,    Adjusted R-squared:  0.1675 
F-statistic: 16.34 on 8 and 602 DF,  p-value: < 2.2e-16
anova(reduced.model, full.model)
Analysis of Variance Table

Model 1: log_relational_novelty ~ log_count + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + 
    Q10
Model 2: log_relational_novelty ~ factor(group) + log_count + Q7_Q7_1 + 
    Q7_Q7_2 + Q8_Q8_1 + Q10
  Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
1    605 509.79                              
2    602 501.19  3     8.603 3.4445 0.01652 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
boxplot(log_relational_novelty~ stage*group,
col=c("white","lightgray", "blue", "green"),df)

check_collinearity(full.model)
# Check for Multicollinearity

Low Correlation

          Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
 factor(group) 1.09 [1.03, 1.25]         1.04      0.92     [0.80, 0.97]
     log_count 1.03 [1.00, 1.41]         1.02      0.97     [0.71, 1.00]
       Q7_Q7_1 1.57 [1.42, 1.76]         1.25      0.64     [0.57, 0.70]
       Q7_Q7_2 1.64 [1.49, 1.85]         1.28      0.61     [0.54, 0.67]
       Q8_Q8_1 1.24 [1.15, 1.39]         1.11      0.80     [0.72, 0.87]
           Q10 1.18 [1.10, 1.32]         1.09      0.85     [0.76, 0.91]
library(car)

vif(full.model)
                  GVIF Df GVIF^(1/(2*Df))
factor(group) 1.086998  3        1.014000
log_count     1.033040  1        1.016386
Q7_Q7_1       1.566162  1        1.251464
Q7_Q7_2       1.640509  1        1.280824
Q8_Q8_1       1.242569  1        1.114706
Q10           1.179095  1        1.085861
vif(reduced.model)
log_count   Q7_Q7_1   Q7_Q7_2   Q8_Q8_1       Q10 
 1.025572  1.541842  1.603686  1.234487  1.143380 
library(multcomp)
library(lsmeans)
#summary(glht(full.model, lsm(pairwise ~ group / stage)), test = adjusted(type='holm'))
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyBRMTAgaXMgYXNwaXJhdGlvbnMgCiMgUTdfN18xIGlzIGphdmFzY3JpcHQgc2tpbGxzIAojIFE3XzdfMiBpcyBtYXNodXAgc2tpbGxzIAojIFE4XzhfMSBpcyBnZW5lcmFsIHByb2dyYW1taW5nIHNraWxscyAKCmBgYHtyfQojIHJlYWRpbmcgaW5wdXQgCmRmPC1yZWFkLmNzdigiaW5wdXQvY29tbWl0c19ub3ZlbHR5LmNzdiIsIGhlYWRlciA9VFJVRSwgc2VwPSIsIikKZGYgPC0gZGZbY29tcGxldGUuY2FzZXMoZGYpLCBdICAKZGYKYGBgCgpgYGB7cn0KZGYkZ3JvdXAgPSBmYWN0b3IoZGYkZ3JvdXApCmBgYAoKCmBgYHtyfQojIGNyZWF0ZSBuZXcgY29sdW1ucyBjYWxsZWQgbG9nIHJlbGF0aW9uYWwgbm92ZWx0eQpkZiRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IDwtIGxvZyhkZiRzaW1pbGFyaXR5KzEpIApkZiRsb2dfY291bnQgPC0gbG9nKGRmJGNvdW50KzEpIApkZgpgYGAKCgpgYGB7cn0KIyBzdGFuZGFyZGl6aW5nIHZhcmlhYmxlcyBmb3Igc2tpbGxzIGFuZCBhc3BpcmF0aW9ucy4gCmNvbHMgPC0gYygiUTdfUTdfMSIsICJRN19RN18yIiwgIlE4X1E4XzEiLCAiUTEwIiwgImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiLCAibG9nX2NvdW50IikKZGZbY29sc10gPC0gc2NhbGUoZGZbY29sc10pCmRmCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0obG9nX2NvdW50IH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCmBgYAoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkvc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICogc3RhZ2UgKyBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCgoKYGBge3J9CiMgUHJvcG9zZWQgbW9kZWwgYnkgc3RlcHdpc2UgcmVncmVzc2lvbgpsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgbG9nX2NvdW50ICsgUTdfUTdfMiAsIGRhdGEgPSBkZikKc3VtbWFyeShtb2QpCkFJQyhtb2QpCkJJQyhtb2QpCmBgYAoKYGBge3J9CiMgd2l0aG91dCB0aGUgZmFjdG9yICggZ3JvdXAgKSBhbmQgd2l0aCBhbGwgY29uZm91bmRpbmcgdmFyaWFibGVzIApsaWJyYXJ5KHN0YXRzKQptb2QgPC0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpBSUMobW9kKQpCSUMobW9kKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMSA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGxvZ19jb3VudCArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kLjEpCkFJQyhtb2QuMSkKQklDKG1vZC4xKQpgYGAKCmBgYHtyfQpsaWJyYXJ5KHN0YXRzKQptb2QuMiA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBsb2dfY291bnQgKyBRN19RN18yICwgZGF0YSA9IGRmKQpzdW1tYXJ5KG1vZC4yKQpBSUMobW9kLjIpCkJJQyhtb2QuMikKYGBgCgpgYGB7cn0KIyBtb2RlbCB3aXRoIGFuZCB3aXRob3V0IGdyb3VwcyBhcmUgdmVyeSBkaWZmZXJlbnQgKCBzaWduaWZpY2FudCApCmFub3ZhKG1vZC4xLCBtb2QuMikKYGBgCgoKYGBge3J9CmxpYnJhcnkoQUxTTSkKc3RlcChsbShsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YT1kZiksCm1ldGhvZD0iYm90aCIsIHRyYWNlID0gMSApCmBgYAoKCgoKYGBge3J9Cm1vZCA8LSBsbSggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyBRMTAgKyBROF9ROF8xICsgUTdfUTdfMSArIFE3X1E3XzIgLCBkYXRhID0gZGYpCnN1bW1hcnkobW9kKQpgYGAKCiMjIE5lc3QgUGhhc2UgaW4gR3JvdXAgWyBMaW5lYXIgTWl4ZWQgTW9kZWwgXQoKYGBge3J9CiMgY29udmVydCB0byBub21pbmFsIGZhY3RvcgpkZiR1c2VyMiA9IGZhY3RvcihkZiR1c2VyMikKZGYkc3RhZ2UgPSBmYWN0b3IoZGYkc3RhZ2UpCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpgYGAKCmBgYHtyfQojIGV4cGxvcmUgdGhlIGRhdGEgYW5kIHRoZWlyIGxldmVscyAKbGlicmFyeShwbHlyKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBmdW5jdGlvbihkYXRhKSBzdW1tYXJ5KGRhdGEkbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkgKQpkZHBseShkZiwgfiBncm91cCAqIHN0YWdlLCBzdW1tYXJpc2UsIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkubWVhbj1tZWFuKGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpLCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5LnNkID0gc2QobG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkpCmBgYApgYGB7cn0KIyBoaXN0b2dyYW1zIGZvciB0d28gZmFjdG9ycwpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAwICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMCAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDAgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gMSxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMSAmIGRmJHN0YWdlID09IDIsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDEgJiBkZiRzdGFnZSA9PSAzLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAxICYgZGYkc3RhZ2UgPT0gNCxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDEsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDIgJiBkZiRzdGFnZSA9PSAyLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAyICYgZGYkc3RhZ2UgPT0gMyxdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMiAmIGRmJHN0YWdlID09IDQsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSAxLF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKaGlzdChkZltkZiRncm91cCA9PSAzICYgZGYkc3RhZ2UgPT0gMixdJGxvZ19yZWxhdGlvbmFsX25vdmVsdHkpCmhpc3QoZGZbZGYkZ3JvdXAgPT0gMyAmIGRmJHN0YWdlID09IDMsXSRsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KQpoaXN0KGRmW2RmJGdyb3VwID09IDMgJiBkZiRzdGFnZSA9PSA0LF0kbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSkKYm94cGxvdChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZ3JvdXAgKiBzdGFnZSwgZGF0YSA9IGRmLCB4bGFiPSJHcm91cC5TdGFnZSIsIHlsYWI9ImxvZ19yZWxhdGlvbmFsX25vdmVsdHkiKQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBzdGFnZSwgbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSwgeWxpbT1jKDAsIG1heChsb2dfcmVsYXRpb25hbF9ub3ZlbHR5KSkpKSAjIGludGVyYWN0aW9uIHBsb3QKYGBgCmBgYHtyfQojIGxpYnJhcnkgZm9yIExNTSB3ZSB3aWxsIHVzZSBvbiByZWxhdGlvbmFsIG5vdmVsdHkgCgpsaWJyYXJ5KGxtZTQpCmxpYnJhcnkobG1lclRlc3QpCmxpYnJhcnkoY2FyKQpgYGAKCiMgc2V0IHN1bS10by16ZXJvIGNvbnRyYXN0cyBmb3IgdGhlIEFub3ZhIGNlbGxzIAoKYGBge3J9CmNvbnRyYXN0cyhkZiRncm91cCkgPD0gImNvbnRyLnN1bSIKY29udHJhc3RzKGRmJHN0YWdlKSA8PSAiY29udHIuc3VtIgpgYGAKCgpgYGB7cn0KIyBzdGFnZSBuZXN0ZWQgd2l0aGluIGdyb3VwIApmdWxsLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGdyb3VwL3N0YWdlICsgKDEgfCB1c2VyMiApLCBkYXRhID0gZGYsIFJFTUwgPSBGQUxTRSkKQW5vdmEoZnVsbC5tb2RlbCwgdHlwZT0zLCB0ZXN0LnN0YXRpc3RpY3M9IkYiKQpmdWxsLm1vZGVsCmBgYApgYGB7cn0KbGlicmFyeShwZXJmb3JtYW5jZSkKCmNoZWNrX2NvbGxpbmVhcml0eShmdWxsLm1vZGVsKQoKCmBgYAojIHZhcmlhYmlsaXR5IGlzIHZlcnkgbXVjaCBoaWdoZXIgaW4gaW5kaXZpZHVhbCB1c2VyIGFuZCB0aGFuIGluIHN0YWdlcy9waGFzZXMgZm9yIHJlbGF0aW9uYWwgbm92ZWx0eS4gVGhlIHJlbWFpbmluZyB2YXJpYWJpbGl0eSBvZiAgMC4yMDM1MDIgY29tZXMgZnJvbSBmYWN0b3Igb3RoZXIgdGhhbiBpbmRpdmlkdWFsIHVzZXJzIGFuZCBzdGFnZS4gZmFjdG9yKGdyb3VwKTEgIGhhcyBoaWdoZXIgcmVsYXRpb25hbCBub3ZlbHR5IHRoYW4gZ3JvdXAgMCBieSBhYm91dCAgMC4yMDk5IC4gZmFjdG9yKGdyb3VwKTMgaGFzIGhpZ2hlciByZWxhdGlvbmFsIG5vdmVsdHkgdGhhbiBncm91cCAwIGJ5IGFib3V0IDAuMzc2NS4gCgpgYGB7cn0KdmFyLm1vZGVsID0gbG1lciggbG9nX3JlbGF0aW9uYWxfbm92ZWx0eSB+IGZhY3Rvcihncm91cCkgKyAoIDEgfCB1c2VyMikgKyAoIDEgfCBzdGFnZSksIGRhdGEgPSBkZikKc3VtbWFyeSh2YXIubW9kZWwpCmBgYAoKCmBgYHtyfQpyZWR1Y2VkLm1vZGVsID0gbG0oIGxvZ19yZWxhdGlvbmFsX25vdmVsdHkgfiBsb2dfY291bnQgKyBRN19RN18xICsgUTdfUTdfMiArIFE4X1E4XzEgKyBRMTAsIGRhdGEgPSBkZikKc3VtbWFyeShyZWR1Y2VkLm1vZGVsKQpgYGAKCgpgYGB7cn0KZnVsbC5tb2RlbCA9IGxtKCBsb2dfcmVsYXRpb25hbF9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIGxvZ19jb3VudCArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCwgZGF0YSA9IGRmKQpzdW1tYXJ5KGZ1bGwubW9kZWwpCmBgYAoKYGBge3J9CmFub3ZhKHJlZHVjZWQubW9kZWwsIGZ1bGwubW9kZWwpCmBgYAoKCmBgYHtyfQpib3hwbG90KGxvZ19yZWxhdGlvbmFsX25vdmVsdHl+IHN0YWdlKmdyb3VwLApjb2w9Yygid2hpdGUiLCJsaWdodGdyYXkiLCAiYmx1ZSIsICJncmVlbiIpLGRmKQpgYGAKCmBgYHtyfQpjaGVja19jb2xsaW5lYXJpdHkoZnVsbC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShjYXIpCgp2aWYoZnVsbC5tb2RlbCkKYGBgCmBgYHtyfQp2aWYocmVkdWNlZC5tb2RlbCkKYGBgCgpgYGB7cn0KbGlicmFyeShtdWx0Y29tcCkKbGlicmFyeShsc21lYW5zKQojc3VtbWFyeShnbGh0KGZ1bGwubW9kZWwsIGxzbShwYWlyd2lzZSB+IGdyb3VwIC8gc3RhZ2UpKSwgdGVzdCA9IGFkanVzdGVkKHR5cGU9J2hvbG0nKSkKYGBgCgo=